Thoughts from recent conversations (12/15/2021):
Game plan:
load('lvl0_snapshots/mea_acute_lvl0_2020-07-29.RData')
# filter out wllq == 0
dat <- mea_acute_lvl0[wllq == 1]
rm(mea_acute_lvl0)
# assign the cndx
dat[, conc := signif(conc, 3)]
dat[is.na(conc), .N, by = .(wllt, spid)] # this is all fine
dat[, cndx := frank(conc, ties.method = 'dense'), by = .(apid, spid, wllt, srcf, apid)] # dense will collapse the rep's
# Calculate the bvals
dat[wllt == 'n', .N, by = spid] # most DMSO, some water, no Media
dat[, bval.n2low := median(rval[wllt == 'n' | (wllt == 't' & cndx %in% c(1,2))], na.rm = T), by = .(acnm, apid)]
dat[, bval.2low := median(rval[(wllt == 't' & cndx %in% c(1,2))], na.rm = T), by = .(acnm, apid)]
dat[, bval.n := median(rval[wllt == 'n'], na.rm = T), by = .(acnm, apid)]
# Questions:
# - How does the bval of 2 low only compare to bval of n and 2 low?
# Is either consistently lower or higher?
# Is the tail of either thicker or thinner? (e.g. at least fewer really low balls with 2 low only?)
# Another question:
# should we just filter out edgewells?
# Start with the classic wmfr
acnmi <- 'CCTE_Shafer_MEA_acute_firing_rate_mean_weighted'
ggplot(dat[acnm == acnmi], aes(x = bval.n2low, y = bval.2low)) +
geom_point()+
geom_abline(slope = 1, intercept = 0)+
ggtitle(paste0(acnmi,'\nBval of 2 Lowest conc\'s only vs\nBval of n wells + 2 lowest conc\'s'))
Observations:
Let’s check out another endpoint, one that has some low bval’s
dat[, .(median(bval.n2low)), by = .(acnm)][order(V1)]
acnmi <- 'CCTE_Shafer_MEA_acute_cross_correlation_area'
ggplot(dat[acnm == acnmi], aes(x = bval.n2low, y = bval.2low)) +
geom_point()+
geom_abline(slope = 1, intercept = 0)+
ggtitle(paste0(acnmi,'\nBval of 2 Lowest conc\'s only vs\nBval of n wells + 2 lowest conc\'s'))
Oh wow, even here it’s not really any better!!
How to quickly visually verify that for all endpoints the bval of 2 low only isn’t really any better?
Not sure, but let’s do a quick t-test
# Get a table with 1 row per apid
bval.tb <- dat[, unique(.SD), .SDcols = c('acnm','apid','bval.n2low','bval.2low')]
t.test.tb <- data.table()
for (acnmi in unique(dat$acnm)) {
res <- t.test(x = bval.tb[acnm == acnmi, bval.n2low],
y = bval.tb[acnm == acnmi, bval.2low],
alternative = 'two.sided',
mu = 0,
paired = TRUE,
var.equal = FALSE)
add.tb <- data.table(acnm = acnmi,
statistic = res$statistic,
parameter = res$parameter,
p.value = res$p.value,
conf.int.lb = res$conf.int[1],
conf.int.ub = res$conf.int[2],
mean_of_the_differences = res$estimate[1])
t.test.tb <- rbind(t.test.tb, add.tb)
}
t.test.tb
p <- ggplot(t.test.tb[!grepl('(LDH)|(AB)',acnm)], aes(text = acnm, x = mean_of_the_differences, y = p.value)) +
geom_point()+
scale_y_log10()+
geom_hline(yintercept = 0.05)+
ggtitle(paste0('Welch Two Sample Paired t-test of bval of n wells + 2 low concs vs bval of 2 low concs only\nfor the\n ',nrow(t.test.tb[!grepl('(LDH)|(AB)',acnm)]),' MEA Acute components'))
plotly::ggplotly(p, tooltip = 'all')
# Do some ranking of the apids
use.bval.tb <- bval.tb[!grepl('(LDH)|(AB)',acnm)]
use.bval.tb[, apid_rank_in_acnm := frank(bval.n2low, ties.method = 'average'), by = .(acnm)]
use.bval.tb[, apid_med_rank := median(apid_rank_in_acnm), by = .(apid)]
use.bval.tb[, apid_min_rank := min(apid_rank_in_acnm), by = .(apid)]
use.bval.tb[, apid_med_bval.n2low := median(bval.n2low), by = .(apid)]
use.bval.tb[, apid_min_bval.n2low := min(bval.n2low), by = .(apid)]
apid.t.test.tb <- data.table()
for (apidi in unique(dat$apid)) {
res <- t.test(x = use.bval.tb[apid == apidi, bval.n2low],
y = use.bval.tb[apid == apidi, bval.2low],
alternative = 'two.sided',
mu = 0,
paired = TRUE,
var.equal = FALSE)
add.tb <- data.table(apid = apidi,
statistic = res$statistic,
parameter = res$parameter,
p.value = res$p.value,
conf.int.lb = res$conf.int[1],
conf.int.ub = res$conf.int[2],
mean_of_the_differences = res$estimate[1])
apid.t.test.tb <- rbind(apid.t.test.tb, add.tb)
}
apid.t.test.tb <- merge(apid.t.test.tb,
use.bval.tb[, unique(.SD), .SDcols = c('apid','apid_med_rank','apid_min_rank',
'apid_med_bval.n2low','apid_min_bval.n2low')],
by = 'apid', all.x = T)
apid.t.test.tb
p <- ggplot(apid.t.test.tb, aes(text = apid, x = apid_min_rank, y = p.value)) +
geom_point(aes(color = mean_of_the_differences))+
scale_y_log10()+
geom_hline(yintercept = 0.05)+
ggtitle(paste0('Welch Two Sample Paired t-test of\nbval of n wells + 2 low concs vs bval of 2 low concs only\nfor the ',nrow(t.test.tb[!grepl('(LDH)|(AB)',acnm)]),' MEA Acute components'))
plotly::ggplotly(p, tooltip = 'all')
What if I check it out by just the raw min bval, rather than the basing it on the rank, which doesn’t actually tell me if the apid is concerningly low?
p <- ggplot(apid.t.test.tb, aes(text = apid, x = apid_min_bval.n2low, y = p.value)) +
geom_point(aes(color = mean_of_the_differences))+
scale_y_log10()+
geom_hline(yintercept = 0.05)+
ggtitle(paste0('Welch Two Sample Paired t-test of\nbval of n wells + 2 low concs vs bval of 2 low concs only\nfor the ',nrow(t.test.tb[!grepl('(LDH)|(AB)',acnm)]),' MEA Acute components'))
plotly::ggplotly(p, tooltip = 'all')
Sad, the apid with the smallest min bvals are still not really helped by using 2low only (at least, on average).
Maybe doing the t-test and looking at this aggregate level is not really helpful?